library(magrittr)
library(tidyr)
library(dplyr)
library(plotly)
library(ggrepel)

1 Initialisation and data import

source("scripts/init.R")
source("scripts/import_data.R")
source("scripts/data_phenot_get_parameters_std.R")
source("scripts/data_phenot_outliers_exclusion_std.R")

glimpse(data_phenot_parms_clean)
## Rows: 1,981
## Columns: 6
## $ robot_id    <chr> "R20-20210930-001", "R20-20210930-001", "R20-20210930-001"~
## $ strain_name <chr> "LB-PD7-T1-1", "LB-PD7-T1-1", "LB-PD7-T1-1", "LB-PD7-T1-1"~
## $ bloc        <fct> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3~
## $ bloc_month  <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ parameter   <chr> "cell_t0", "co2max", "death_prct", "lag", "pop_size", "tvm~
## $ value       <dbl> 9.810000e+05, 2.223684e+01, 1.330000e+01, 9.868000e+00, 6.~

2 Phenotypic

data_phenot_parms_clean_wide <- data_phenot_parms_clean %>%
  pivot_wider(names_from = "parameter",
              values_from = "value") %>%
  group_by(strain_name) %>%
  mutate(replicate = paste0(strain_name, ".", row_number())) %>%
  column_to_rownames(var = "replicate") %>%
  ungroup() %>%
  mutate(baker_id = strains$baker_id[match(strain_name, strains$strain_name)])
  

data_phenot_parms_clean_mat <- data_phenot_parms_clean_wide %>%
  select(names(.)[names(.) %in% data_phenot_parms_clean$parameter]) %>%
  as.matrix()
  

pca_pheno <- prcomp(data_phenot_parms_clean_mat, rank. = 3, scale. = T)

components <- pca_pheno[["x"]]
components <- data.frame(components)
components$PC2 <- -components$PC2
components$PC3 <- -components$PC3

tot_explained_variance_ratio <- summary(pca_pheno)[["importance"]]['Proportion of Variance',]
tot_explained_variance_ratio <- 100 * sum(tot_explained_variance_ratio)


fig <- plot_ly(components, x = ~PC1, y = ~PC2, z = ~PC3, 
               color = ~data_phenot_parms_clean_wide$baker_id, 
               colors = c('#636EFA','#EF553B','#00CC96','violet') ) %>%
  add_markers(size = 12)


fig <- fig %>%
  layout(
    scene = list(bgcolor = "#e5ecf6")
)

fig
plot <- components %>%
  mutate(baker_id = data_phenot_parms_clean_wide$baker_id,
         strain_name = data_phenot_parms_clean_wide$strain_name) %>%
  ggplot() +
  aes(x = PC1, y = PC2, fill = baker_id, color = baker_id, group = strain_name) +
  geom_point() +
  geom_polygon(alpha = 0.2, color = alpha("white",0)) +
  geom_text_repel(aes(label = strain_name , x = )) +
  theme_minimal()
  
plot

plot +  facet_wrap(~ baker_id)

3 Clustering

library(dendextend)

dm <- dist(x = scale(data_phenot_parms_clean_mat), method = "euclidean")
hc <- hclust(dm, method = "ward.D")
dg <- as.dendrogram(hc)

colorCodes <- c(LB="red", MB="green", AM="blue", PDC="yellow")
labels_colors(dg) <- colorCodes[data_phenot_parms_clean_wide$baker_id][order.dendrogram(dg)]
par(mar = c(1,1,1,10))

dg %>% 
  set("labels_cex",0.7) %>% 
  plot(., horiz=T,  which.plots=2)